Deep Learning activation functions examined below include ReLU, Leaky ReLU Sigmoid, tanh


In [19]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
z = np.linspace(-5,5,num=1000)

Create plot drawing function


In [32]:
def draw_activation_plot(a,quadrants=2,y_ticks=[0],y_lim=[0,5]):     
    #Create figure and axis
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)

    #Move left axis  
    ax.spines['left'].set_position('center')
    

    # Remove top and right axes
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')

    #Set x and y labels
    plt.xlabel('z')
    plt.ylabel('a')
    
    #Set ticks
    plt.xticks([])
    plt.yticks(y_ticks)
    
    #Set ylim
    plt.ylim(y_lim)
    
    #4 Quadrant conditions
    if quadrants==4:
        #Move bottom axis
        ax.spines['bottom'].set_position('center')
        #Move x and y labels
        ax.yaxis.set_label_coords(.48,.75)
        ax.xaxis.set_label_coords(.75,.48)



    plt.plot(z,a);

ReLU

Great default choice for hidden layers. It is frequently used in industry and is almost always adequete to solve a problem.


In [33]:
relu = np.maximum(z,0)
draw_activation_plot(relu)



In [20]:
def draw_2_quad_plot(a):     
    #Create figure and axis
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)

    #Move bottom and left axes
    ax.spines['left'].set_position('center')
    #ax.spines['bottom'].set_position('center')

    # Remove top and right axes
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')

    #Set x and y labels
    plt.xlabel('z')
    plt.ylabel('a')

    #Set ticks
    plt.xticks([])
    plt.yticks([0])
    
    plt.ylim([0,5])

    plt.plot(z,a);

relu = np.maximum(z,0)
draw_2_quad_plot(relu)


Leaky ReLU

Can help by providing differentiable point at 0.


In [24]:
leaky_ReLU = np.maximum(0.01*z,z)

draw_4_quad_plot(tanh)


tanh

Usually strictly better than sigmoid


In [23]:
def draw_4_quad_plot(a):     
    #Create figure and axis
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)

    #Move bottom and left axes
    ax.spines['left'].set_position('center')
    ax.spines['bottom'].set_position('center')

    # Remove top and right axes
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')

    #Set x and y labels
    plt.xlabel('z')
    plt.ylabel('a')

    #Move x and y labels
    ax.yaxis.set_label_coords(.48,.75)
    ax.xaxis.set_label_coords(.75,.48)

    #Set ticks
    plt.xticks([])
    plt.yticks([-1,0,1])

    plt.plot(z,a);

tanh = (np.exp(z)-np.exp(-z))/(np.exp(z)+np.exp(-z))
draw_4_quad_plot(tanh)


sigmoid

Almost never used except in output layer when dealing with binary classification.


In [21]:
sigmoid = 1/(1+np.exp(-z))

draw_2_quad_plot(sigmoid)



In [ ]:


In [72]:
#Create z and sigma

sigma = 1/(1+np.exp(-z))

#Draw prediction cut-off line
plt.axhline(0.5, color='black',ls='--')

#Label axis
plt.xlabel('z')
plt.ylabel(r'$\hat{y}$')

#Plot graph
plt.tick_params(axis='x',bottom='off',labelbottom='off')
plt.plot(z,sigma,'-',lw=3);



In [ ]: